\subsection{Overview}
\begin{frame}
    \frametitle{Purpose}
    \begin{itemize}
        \item Python libraries that allow for scripted Windows binary inspection
        \item These tools allow for great automation of various analysis tasks
        \item \emph{pefile} is a PE Format Python parsing module
        \item \emph{pydasm} is an \emph{x86} Python disassembler module
            \pedbullet{pydasm is used by PaiMei (we may talk about this later)}
    \end{itemize}
\end{frame}

\begin{frame}
    \frametitle{pefile}
    \begin{block}{}
        \begin{center}
            http://dkbza.org/pefile.html
        \end{center}
    \end{block}

    \begin{itemize}
        \item \emph{pefile} is a cross-platform pure Python module intended for handling PE Files
        \item Actively maintained by Ero
        \item It should be able to process any file that can be open by the Windows loader
        \item Usage requires some basic understanding of the layout of a PE file
        \item You can use \emph{pefile} to explore nearly every single feature of a PE file
    \end{itemize}
\end{frame}


\subsection{pefile}
\begin{frame}
    \frametitle{Loading a PE file}
    \begin{itemize}
        \item Loading a file is as easy as creating an instance of the PE class with a path to the PE file
            \begin{block}{}
                \begin{center}
                    pe = pefile.PE('path/to/file')
                \end{center}
            \end{block}
        \item Alternatively, you can pass a PE file as raw data
            \begin{block}{}
                \begin{center}
                    pe = pefile.PE(data=python\_string)
                \end{center}
            \end{block}
    \end{itemize}
\end{frame}

\begin{frame}[fragile]
    \frametitle{Inspecting the Headers}
    \begin{block}<1->{}
        \small
        \begin{semiverbatim}
\uncover{>>> import pefile}
\uncover{}
\uncover{>>> pe = pefile.PE('notepad.exe')}
\uncover{>>> hex(pe.OPTIONAL\_HEADER.ImageBase)}
\uncover{['0x1000000L']}
\uncover{}
\uncover{>>> hex(pe.OPTIONAL\_HEADER.AddressOfEntryPoint)}
\uncover{['0x6AE0L']}
\uncover{}
\uncover{>>> hex(pe.OPTIONAL\_HEADER.NumberOfRvaAndSizes)}
\uncover{['0x10L']}
\uncover{}
\uncover{>>> hex(pe.FILE\_HEADER.NumberOfSections)}
\uncover{['0x3']}
        \end{semiverbatim}
    \end{block}
\end{frame}

\begin{frame}[fragile]
    \frametitle{Inspecting the Sections}
    \begin{block}{}
        \begin{semiverbatim}
\uncover{>>> for section in pe.sections:}
\uncover{...  print (section.Name,}
\uncover{       hex(section.VirtualAddress),}
\uncover{       hex(section.Misc_VirtualSize),}
\uncover{       section.SizeOfRawData )}
\uncover{... }
\uncover{('.text', '0x1000L', '0x6D72L', 28160L)}
\uncover{('.data', '0x8000L', '0x1BA8L', 1536L)}
\uncover{('.rsrc', '0xA000L', '0x8948L', 35328L)}
        \end{semiverbatim}
    \end{block}
\end{frame}

\begin{frame}[fragile]
    \frametitle{Think it's Packed?}
    \begin{block}{}
        \begin{semiverbatim}
\uncover{import math}
\uncover{}
\uncover{def H(data):}
\uncover{   if not data:}
\uncover{       return 0}
\uncover{}
\uncover{   entropy = 0}
\uncover{   for x in range(256):}
\uncover{       p_x = float(data.count(chr(x)))/len(data)}
\uncover{       if p_x > 0:}
\uncover{           entropy += - p_x*math.log(p_x, 2)}
\uncover{}
\uncover{   return entropy}
        \end{semiverbatim}
    \end{block}
\end{frame}

\begin{frame}[fragile]
    \frametitle{Think it's Packed? (Unpacked)}
    \begin{block}{}
        \begin{semiverbatim}
\uncover{>>> for section in pe.sections:}
\uncover{...  print section.Name, H(section.data)}
\uncover{... }
\uncover{.text 6.28370964662}
\uncover{.data 1.39795676336}
\uncover{.rsrc 5.40687515641}
        \end{semiverbatim}
    \end{block}
\end{frame}

\begin{frame}[fragile]
    \frametitle{Think it's Packed? (ASPack)}
    \begin{block}{}
        \begin{semiverbatim}
\uncover{>>> pe2 = pefile.PE('notepad-aspack.exe')}
\uncover{>>> for section in pe2.sections:}
\uncover{...  print section.Name, H(section.data)}
\uncover{... }
\uncover{.text 7.98363149339}
\uncover{.data 4.68226874255}
\uncover{.rsrc 6.09026175185}
\uncover{.aspack 5.90609875421}
\uncover{.adata 0}
        \end{semiverbatim}
    \end{block}
\end{frame}

\begin{frame}[fragile]
    \frametitle{Think it's Packed? (UPX)}
    \begin{block}{}
        \begin{semiverbatim}
\uncover{>>> pe3 = pefile.PE('notepad-upx.exe')}
\uncover{>>> for section in pe3.sections:}
\uncover{...  print section.Name, H(section.data)}
\uncover{... }
\uncover{UPX0 0}
\uncover{UPX1 7.83028313969}
\uncover{.rsrc 5.59212256596}
        \end{semiverbatim}
    \end{block}
\end{frame}

\begin{frame}[fragile]
    \frametitle{Imports}
    \begin{block}{}
        \begin{semiverbatim}
\uncover{>>> for entry in pe.DIRECTORY_ENTRY_IMPORT:}
\uncover{...   print entry.dll}
\uncover{...   for imp in entry.imports:}
\uncover{...     print '\\t', hex(imp.address), imp.name}
\uncover{... }
\uncover{comdlg32.dll}
\uncover{        0x10012A0L PageSetupDlgW}
\uncover{        0x10012A4L FindTextW}
\uncover{        0x10012A8L PrintDlgExW}
\uncover{[snip]}
\uncover{SHELL32.dll}
\uncover{        0x1001154L DragFinish}
\uncover{        0x1001158L DragQueryFileW}
        \end{semiverbatim}
    \end{block}
\end{frame}


\subsection{pydasm}

\begin{frame}
    \frametitle{pydasm}
    \begin{block}<1->{}
        \begin{center}
            http://dkbza.org/pydasm.html
        \end{center}
    \end{block}
    \begin{itemize}
        \item \emph{pydasm} is a cross-platform Python module wrapping jt's libdasm
        \item Combined with \emph{pefile}, you get a good base for developing a mini-IDA wannabe
    \end{itemize}
\end{frame}


\begin{frame}[fragile]
    \frametitle{Disassembling}
    \begin{block}{}
        \begin{semiverbatim}
\uncover{>>> import pydasm}
\uncover{>>> i = pydasm.get_instruction('\\x90', pydasm.MODE_32)}
\uncover{>>> pydasm.get_instruction_string(}
\uncover{       i, pydasm.FORMAT_INTEL, 0)}
\uncover{}
\uncover{['nop ']}
\uncover{}
\uncover{>>> i = pydasm.get_instruction(}
\uncover{       '\\x8B\\x04\\xBD\\xE8\\x90\\x00\\x01', pydasm.MODE_32)}
\uncover{>>> pydasm.get_instruction_string(}
\uncover{       i, pydasm.FORMAT_INTEL, 0)}
\uncover{}
\uncover{['mov eax,[edi*4+0x10090e8]']}
        \end{semiverbatim}
    \end{block}
\end{frame}

\begin{frame}[fragile]
    \frametitle{The Instruction Object}
    \begin{block}{}
        \begin{semiverbatim}
\uncover{>>> pprint.pprint(dir(i))}
\uncover{['__doc__','__module__',}
\uncover{ 'dispbytes', 'extindex',}
\uncover{ 'flags', 'fpuindex',}
\uncover{ 'immbytes', 'length',}
\uncover{ 'mode','modrm',}
\uncover{ 'op1', 'op2', 'op3',}
\uncover{ 'opcode', 'ptr',}
\uncover{ 'sectionbytes', 'sib',}
\uncover{ 'type']}
        \end{semiverbatim}
    \end{block}
\end{frame}

\begin{frame}[fragile]
    \frametitle{The Operand Object}
    \begin{block}{}
        \begin{semiverbatim}
\uncover{>>> pprint.pprint(dir(i.op1))}
\uncover{['__doc__', '__module__',}
\uncover{ 'basereg', 'dispbytes',}
\uncover{ 'displacement', 'dispoffset',}
\uncover{ 'flags', 'immbytes',}
\uncover{ 'immediate', 'immoffset',}
\uncover{ 'indexreg', 'reg',}
\uncover{ 'scale', 'section',}
\uncover{ 'sectionbytes', 'type']}
        \end{semiverbatim}
    \end{block}
\end{frame}

\begin{frame}[fragile]
    \frametitle{pefile+pydasm}
    \begin{block}{}
        \begin{semiverbatim}
\uncover{>>> ep = pe.OPTIONAL_HEADER.AddressOfEntryPoint}
\uncover{>>> ep_ava = ep+pe.OPTIONAL_HEADER.ImageBase}
\uncover{>>> data = pe.get_memory_mapped_image()[ep:ep+100]}
\uncover{>>> offset = 0}
\uncover{>>> while offset < len(data):}
\uncover{...   i = pydasm.get_instruction(}
\uncover{           data[offset:], pydasm.MODE_32)}
\uncover{...   print pydasm.get_instruction_string(}
\uncover{           i, pydasm.FORMAT_INTEL, ep_ava+offset)}
\uncover{...   offset += i.length}
        \end{semiverbatim}
    \end{block}
\end{frame}

\begin{frame}[fragile]
    \frametitle{pefile+pydasm (Output)}
    \begin{block}{}
    \small
        \begin{semiverbatim}
\uncover{push byte 0x70}
\uncover{push dword 0x1001888}
\uncover{call 0x1006ca8}
\uncover{xor ebx,ebx}
\uncover{push ebx}
\uncover{mov edi,[0x100114c]}
\uncover{call edi}
\uncover{cmp word [eax],0x5a4d}
\uncover{jnz 0x1006b1d}
\uncover{mov ecx,[eax+0x3c]}
\uncover{add ecx,eax}
\uncover{cmp dword [ecx],0x4550}
\uncover{jnz 0x1006b1d}
\uncover{movzx eax,[ecx+0x18}
        \end{semiverbatim}
    \end{block}
\end{frame}


\subsection{Exercises}
\begin{frame}
    \frametitle{pefile Exercises}
    \begin{itemize}
        \item Load a \emph{DLL} and print the imported and exported symbols
        \item Load a \emph{PE} file and check whether the entry point is in the last section and whether such section is smaller than the others
        \item Load a \emph{PE} file and dump the first 256 bytes, starting from the entry point, to disk
            \pedbullet{Think of a way of using this metadata as a packer or executable signature}
    \end{itemize}
\end{frame}

\begin{frame}
    \frametitle{pydasm Exercises}
    \begin{itemize}
        \item Building on the examples in the slides, create a disassembler that follows references and try to disassemble as far as you can
        \item Create a histogram from the mnemonics of the disassembled code
        \item Think of ways to use the histogram (or more advanced statistical techniques) to build a classifier based on entry point code
    \end{itemize}
\end{frame}
